Purpose: Demonstrate Model Creation using AWS boto3 sdk using Iris Multi-class dataset

Steps:

  1. Upload Training Set and Training Schema files to S3
  2. Create Training Data Source, Evaluation Data Source
  3. Create MultiClass Model using Training Data Source
  4. Create Evaluation using Evaluation Data Source
  5. Get Status and Print Performance Metrics Avg. F1 Score

In [1]:
import boto3
import os

In [2]:
# Local Data Path
# Training, Eval Files
# Batch Evaluation Files
# Download Batch result to local drive
data_path = r'..\..\Data\ModelCreationDemo'
# Training and Test Files
training_file = 'iris_data_train.csv'
training_schema_file = 'iris_data_train.csv.schema'
batch_test_file = 'iris_data_classifier_test.csv'

In [3]:
# S3 Bucket name - bucket name needs to be globally unique across AWS (not just your account)
# Example (make sure you modify it to point to your s3 bucket)
s3_bucket_name = 'ml-course'
s3_folder_name = 'boto3_demo' 
# s3_path will be s3_bucket_name/s3_folder_name/

In [4]:
# Initialize Session with appropriate user profile.
# Optionally, use a different profile (profile_name='mluser') instead of default profile
# Specify region where you want to create your ML Models and files

# http://docs.aws.amazon.com/machine-learning/latest/dg/regions-and-endpoints.html
# currently ML is available in US East (N. Virginia)	us-east-1 and EU (Ireland)	eu-west-1

session   = boto3.Session(region_name = 'us-east-1',
                          profile_name = 'ml_user')
ml_client = session.client('machinelearning' )

In [5]:
def upload_files_to_s3():    
    s3Client = session.resource('s3')
    fileNames = [training_file, training_schema_file, batch_test_file]
    for fileName in fileNames:
        filePath = os.path.join(data_path,fileName)
        print(filePath)        
        # upload files to s3 bucket.
        s3Client.Bucket(s3_bucket_name).upload_file(filePath, s3_folder_name + '/' + fileName)

In [6]:
upload_files_to_s3()


..\..\Data\ModelCreationDemo\iris_data_train.csv
..\..\Data\ModelCreationDemo\iris_data_train.csv.schema
..\..\Data\ModelCreationDemo\iris_data_classifier_test.csv

In [7]:
# Derived from
#https://github.com/awslabs/machine-learning-samples/blob/master/social-media/create-aml-model.py

def create_data_source(dataset_name, 
                       s3_data_uri, 
                       s3_schema_uri, 
                       ds_type, percent_begin, 
                       percent_end, 
                       compute_statistics):
    ds_id = "ds-boto3-iris-{0}".format(ds_type)
    data_spec = {}
    data_spec['DataLocationS3'] = s3_data_uri
    data_spec['DataSchemaLocationS3'] = s3_schema_uri
    data_spec['DataRearrangement'] = \
        '{{"splitting":{{"percentBegin":{0},"percentEnd":{1},"strategy":"sequential"}}}}'.format(
        percent_begin, percent_end)
    
    response = ml_client.create_data_source_from_s3(
        DataSourceId = ds_id,
        DataSourceName = "{0}_[percentBegin={1}, percentEnd={2}]".format(dataset_name, percent_begin, percent_end),
        DataSpec = data_spec,    
        ComputeStatistics = compute_statistics)
    
    print("Creating {0} datasource".format(ds_type))
    return response

In [8]:
# Create Training Data Source
s3_train_uri = "s3://{0}/{1}/{2}".format(s3_bucket_name, s3_folder_name, training_file)
s3_train_schema_uri = "s3://{0}/{1}/{2}".format(s3_bucket_name, s3_folder_name, training_schema_file)

In [9]:
s3_train_uri, s3_train_schema_uri


Out[9]:
('s3://ml-course/boto3_demo/iris_data_train.csv',
 's3://ml-course/boto3_demo/iris_data_train.csv.schema')

In [10]:
train_datasource = create_data_source(
    'iris_training', 
    s3_train_uri,
    s3_train_schema_uri,
    'Training',0, 70, True)


Creating Training datasource

In [11]:
eval_datasource = create_data_source(
    'iris_evaluation', 
    s3_train_uri,
    s3_train_schema_uri,
    'Evaluation', 70, 100, False)


Creating Evaluation datasource

In [12]:
print(train_datasource['DataSourceId'])
print(eval_datasource['DataSourceId'])


ds-boto3-iris-Training
ds-boto3-iris-Evaluation

In [13]:
model_create_response = ml_client.create_ml_model(
    MLModelId = 'ml-iris-demo',
    MLModelName = 'ML model: iris-demo-from-code',
    MLModelType = 'MULTICLASS',    
    TrainingDataSourceId = train_datasource['DataSourceId'])

In [14]:
model_create_response


Out[14]:
{'MLModelId': 'ml-iris-demo',
 'ResponseMetadata': {'HTTPHeaders': {'content-length': '28',
   'content-type': 'application/x-amz-json-1.1',
   'date': 'Wed, 27 Sep 2017 01:34:06 GMT',
   'x-amzn-requestid': 'f4001b45-a323-11e7-b81a-45bd44b424f7'},
  'HTTPStatusCode': 200,
  'RequestId': 'f4001b45-a323-11e7-b81a-45bd44b424f7',
  'RetryAttempts': 0}}

In [15]:
# Query 'Status': 'PENDING'|'INPROGRESS'|'FAILED'|'COMPLETED'|'DELETED', 
ml_client.get_ml_model(MLModelId = model_create_response['MLModelId'])['Status']


Out[15]:
'PENDING'

In [16]:
evaluation_response = ml_client.create_evaluation (
    EvaluationId = 'eval-iris-demo',
    EvaluationName = 'Eval ML model: iris-demo-from-code',
    MLModelId = model_create_response['MLModelId'],    
    EvaluationDataSourceId = eval_datasource['DataSourceId'])

In [17]:
evaluation_response


Out[17]:
{'EvaluationId': 'eval-iris-demo',
 'ResponseMetadata': {'HTTPHeaders': {'content-length': '33',
   'content-type': 'application/x-amz-json-1.1',
   'date': 'Wed, 27 Sep 2017 01:34:07 GMT',
   'x-amzn-requestid': 'f469646a-a323-11e7-b81a-45bd44b424f7'},
  'HTTPStatusCode': 200,
  'RequestId': 'f469646a-a323-11e7-b81a-45bd44b424f7',
  'RetryAttempts': 0}}

In [18]:
#Query 'Status': 'PENDING'|'INPROGRESS'|'FAILED'|'COMPLETED'|'DELETED'
eval_result = ml_client.get_evaluation(EvaluationId = evaluation_response['EvaluationId'])

In [19]:
eval_result['Status']


Out[19]:
'PENDING'

In [20]:
eval_result['PerformanceMetrics']


Out[20]:
{'Properties': {}}

In [21]:
eval_result


Out[21]:
{'CreatedAt': datetime.datetime(2017, 9, 27, 3, 33, 13, 462000, tzinfo=tzlocal()),
 'CreatedByIamUser': 'arn:aws:iam::952216537794:user/ml_user',
 'EvaluationDataSourceId': 'ds-boto3-iris-Evaluation',
 'EvaluationId': 'eval-iris-demo',
 'InputDataLocationS3': 's3://ml-course/boto3_demo/iris_data_train.csv',
 'LastUpdatedAt': datetime.datetime(2017, 9, 27, 3, 33, 13, 462000, tzinfo=tzlocal()),
 'MLModelId': 'ml-iris-demo',
 'Name': 'Eval ML model: iris-demo-from-code',
 'PerformanceMetrics': {'Properties': {}},
 'ResponseMetadata': {'HTTPHeaders': {'content-length': '406',
   'content-type': 'application/x-amz-json-1.1',
   'date': 'Wed, 27 Sep 2017 01:34:07 GMT',
   'x-amzn-requestid': 'f4b5618d-a323-11e7-b81a-45bd44b424f7'},
  'HTTPStatusCode': 200,
  'RequestId': 'f4b5618d-a323-11e7-b81a-45bd44b424f7',
  'RetryAttempts': 0},
 'Status': 'PENDING'}